iCellR

Single (i) Cell R package (iCellR) is an interactive R package to work with high-throughput single cell sequencing technologies (i.e scRNA-seq, scVDJ-seq and CITE-seq). The package that allows scientists unprecedented flexibility at every step of the analysis pipeline, including normalization, clustering, dimensionality reduction, imputation, visualization, and so on. iCellR allows users to design both unsupervised and supervised models to best suit their research. In addition, iCellR provides 2D and 3D interactive visualizations, differential expression analysis, filters based on cells, genes and clusters, data merging, normalizing for dropouts and filling them with imputation methods, batch differences, pathway analysis and tools to find marker genes for clusters and conditions, predict cell types and pseudotime analysis.

How to install iCellR

library(devtools)
install_github("rezakj/iCellR")

How to use iCellR for analyzing scRNA-seq data

Download sample data

# set your working directory 
setwd("/your/download/directory")

# save the URL as an object
sample.file.url = "https://s3-us-west-2.amazonaws.com/10x.files/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"

# download the file
download.file(url = sample.file.url, 
     destfile = "pbmc3k_filtered_gene_bc_matrices.tar.gz", 
     method = "auto")  

# unzip the file. 
untar("pbmc3k_filtered_gene_bc_matrices.tar.gz") 

Load iCellR package

library(iCellR)

Read 10x data

my.data <- load10x("filtered_gene_bc_matrices/hg19/",gene.name = "geneSymbol")

Look at the first few lines and columns of the sample data. And count the rows and columns.

head(my.data)[1:5]
##              AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1
## MIR1302.10                  0                0                0
## FAM138A                     0                0                0
## OR4F5                       0                0                0
## RP11.34P13.7                0                0                0
## RP11.34P13.8                0                0                0
## AL627309.1                  0                0                0
##              AAACCGTGCTTCCG-1 AAACCGTGTATGCG-1
## MIR1302.10                  0                0
## FAM138A                     0                0
## OR4F5                       0                0
## RP11.34P13.7                0                0
## RP11.34P13.8                0                0
## AL627309.1                  0                0
dim(my.data)
## [1] 32738  2700

You can devide the sample into 3 equal parts. Assuming that you have three samples.

sample1 <- my.data[1:900]
sample2 <- my.data[901:1800]
sample3 <- my.data[1801:2700]

Merge samples

Conditions in iCellR are set in the header of the data and are separated by an underscore (_)

my.data <- data.aggregation(samples = c("sample1","sample2","sample3"),
                            condition.names = c("WT","KO","Ctrl"))
head(my.data)[1:5]
##          WT_AAACATACAACCAC.1 WT_AAACATTGAGCTAC.1 WT_AAACATTGATCAGC.1
## A1BG                       0                   0                   1
## A1BG.AS1                   0                   0                   0
## A1CF                       0                   0                   0
## A2M                        0                   0                   0
## A2M.AS1                    0                   0                   0
## A2ML1                      0                   0                   0
##          WT_AAACCGTGCTTCCG.1 WT_AAACCGTGTATGCG.1
## A1BG                       0                   0
## A1BG.AS1                   0                   0
## A1CF                       0                   0
## A2M                        0                   0
## A2M.AS1                    0                   0
## A2ML1                      0                   0

Make an iCellR object

my.obj <- make.obj(my.data)

Look at the object

my.obj
## [1] "An object of class iCellR version: 0.99.0"                                     
## [2] "Raw/original data dimentions (rows,columns): 32738,2700"                       
## [3] "Data conditions in raw data: Ctrl,KO,WT (900,900,900)"                         
## [4] "Columns names: WT_AAACATACAACCAC.1,WT_AAACATTGAGCTAC.1,WT_AAACATTGATCAGC.1 ..."
## [5] "Row names: A1BG,A1BG.AS1,A1CF ..."

Run basic QC

my.obj <- qc.stats(my.obj, 
  which.data = "raw.data", 
  mito.genes = "defult", 
  s.phase.genes = s.phase, 
  g2m.phase.genes = g2m.phase)
  • Ploting QC results
stats.plot(my.obj,
    plot.type = "all.in.one",
    out.name = "UMI-plot",
    interactive = FALSE,
    cell.color = "slategray3", 
    cell.size = 1, 
    cell.transparency = 0.5,
    box.color = "red",
    box.line.col = "green")

# Scatter plots
stats.plot(my.obj, plot.type = "point.mito.umi", out.name = "mito-umi-plot",interactive = F)
stats.plot(my.obj, plot.type = "point.gene.umi", out.name = "gene-umi-plot",interactive = F)

Filtering cells

iCellR allows you to filter based on library sizes (UMIs), number of genes per cell, percent mitochondrial content, one or more genes, and cell ids.

my.obj <- cell.filter(my.obj,
    min.mito = 0,
    max.mito = 0.05,
    min.genes = 200,
    max.genes = 2400,
    min.umis = 0,
    max.umis = Inf)
## [1] "cells with min mito ratio of 0 and max mito ratio of 0.05 were filtered."
## [1] "cells with min genes of 200 and max genes of 2400 were filtered."
## [1] "No UMI number filter"
## [1] "No cell filter by provided gene/genes"
## [1] "No cell id filter"
## [1] "filters_set.txt file has beed generated and includes the filters set for this experiment."
  • Down sampling

This step is optional and is for having the same number of cells for each condition.

# optional
# my.obj <- down.sample(my.obj)
#[1] "From"
#[1] "Data conditions: Ctrl,KO,WT (877,877,883)"
#[1] "to"
#[1] "Data conditions: Ctrl,KO,WT (877,877,877)"

Normalizing data

You have a few options to normalize your data based on your study. You can also normalize your data using tools other than iCellR and import your data to iCellR. We recommend “ranked.glsf” normalization for most single cell studies. This normalization is great for fixing matrixes with lots of zeros and because it’s geometric it is great for fixing for batch effects, as long as all the data is aggregated into one file (to aggregate your data see “aggregating data” section above).

  • Choose from the following methods:
    • deseq (best for bulk RNA-Seq)
    • ranked.deseq (best for scRNA-Seq)
    • global.glsf (best for bulk RNA-Seq)
    • ranked.glsf (best for scRNA-Seq)
    • rpm (best for bulk RNA-Seq)
    • spike.in

For spike.in normalization you need to provide the spike.in vlaues to normalize the data with.

my.obj <- norm.data(my.obj, 
     norm.method = "ranked.glsf",
     top.rank = 500)

Scaling data

my.obj <- data.scale(my.obj)

Calculating gene stats

my.obj <- gene.stats(my.obj, which.data = "main.data")

Make gene model for clustering

It’s best to always to avoid global clustering and use a set of model genes. In bulk RNA-seq data it is very common to cluster the samples based on top 500 genes ranked by base mean, this is to reduce the noise. In scRNA-seq data, it’s great to do so as well.

make.gene.model(my.obj, 
    dispersion.limit = 1.5, 
    base.mean.rank = 500, 
    no.mito.model = T,
    no.cell.cycle = T,
    mark.mito = T, 
    interactive = F,
    out.name = "gene.model")
## [1] "my_model_genes.txt file is generated, which can be used for clustering."

Run PCA

my.obj <- run.pca(my.obj, 
                  clust.method = "gene.model", 
                  gene.list = readLines("my_model_genes.txt"), 
                  batch.norm = F)

# Re-define model genes (run this if you have real samples)
# find.dim.genes(my.obj, dims = 1:10, top.pos = 20, top.neg = 10)
 
# Second round PCA and batch correction (run this if you have real sample)
#my.obj <- run.pca(my.obj, 
#                  clust.method = "gene.model", 
#                  gene.list = readLines("my_model_PC_genes.txt"),
#                  batch.norm = T)
# Visualize standard deviations for PCs 
opt.pcs.plot(my.obj)

Cluster the data

Here we cluster the first 10 PC dimensions of the data. You have the option of clustering your data based on the following methods.

  • Clustering methods:
    • ward.D
    • ward.D2
    • single
    • complete
    • average
    • mcquitty
    • median
    • centroid
    • kmeans
  • Distance methods:
    • euclidean
    • maximum
    • manhattan
    • canberra
    • binary
    • minkowski
  • Indexing methods
    • kl
    • ch
    • hartigan
    • ccc
    • scott
    • marriot
    • trcovw
    • tracew
    • friedman
    • rubin
    • cindex
    • db
    • silhouette
    • duda
    • pseudot2
    • beale
    • ratkowsky
    • ball
    • ptbiserial
    • gap
    • frey
    • mcclain
    • gamma
    • gplus
    • tau
    • dunn
    • hubert
    • sdindex
    • dindex
    • sdbw
    • all
my.obj <- run.clustering(my.obj, 
    clust.method = "kmeans", 
    dist.method = "euclidean",
    index.method = "silhouette",
    max.clust = 25,
    min.clust = 2,
    dims = 1:10)

Dimentionality reduction

# tSNE
my.obj <- run.pc.tsne(my.obj, dims = 1:10)

# UMAP
my.obj <- run.umap(my.obj, dims = 1:10, method = "umap-learn") 

# Diffusion map
 my.obj <- run.diffusion.map(my.obj, dims = 1:10)

Create avarge expression per cluster

my.obj <- clust.avg.exp(my.obj)

Data imputaion

my.obj <- run.impute(my.obj)

Save object

save(my.obj, file = "my.obj.Robj")

Visualizing clusters and conditions

  • tSNE
# clusters (tSNE)
cluster.plot(my.obj,
    plot.type = "tsne",
    interactive = F)

# Conditions (tSNE)
cluster.plot(my.obj,
    plot.type = "tsne",
    col.by = "conditions",
    interactive = F)

  • UMAP
# clusters (UMAP)
cluster.plot(my.obj,
    plot.type = "umap",
    interactive = F)

# Conditions (UMAP)
cluster.plot(my.obj,
    plot.type = "umap",
    col.by = "conditions",
    interactive = F)

  • Diffusion map
# clusters (diffusion)
cluster.plot(my.obj,
    plot.type = "diffusion",
    interactive = F)

# Conditions (diffusion)
cluster.plot(my.obj,
    plot.type = "diffusion",
    col.by = "conditions",
    interactive = F)

Cell frequencies in clusters and conditions

# bar
clust.cond.info(my.obj, plot.type = "bar", normalize = T)

# pie
clust.cond.info(my.obj, plot.type = "pie", normalize = T)
## [1] "clust_cond_freq_info.txt file has beed generated."
## [1] "clust_cond_freq_info.txt file has beed generated."

Find marker genes for clusters

marker.genes <- findMarkers(my.obj,
    fold.change = 2,
    padjval = 0.1)
head(marker.genes)
##                 baseMean    baseSD AvExpInCluster AvExpInOtherClusters
## GZMK          0.41991651 1.6966460      2.4363833           0.11537810
## LAG3          0.04546080 0.2827279      0.2468928           0.01503939
## CD8A          0.21021541 0.7086838      1.0163009           0.08847574
## GZMH          0.47430404 1.9741941      2.2865498           0.20060827
## CCL5          2.62409548 6.5488179     12.3863881           1.14973789
## RP11.222K16.2 0.02626753 0.2084994      0.1087713           0.01380733
##               foldChange log2FoldChange         pval         padj clusters
## GZMK           21.116513       4.400300 7.120249e-27 1.478164e-23        1
## LAG3           16.416407       4.037066 1.168625e-10 2.410873e-07        1
## CD8A           11.486775       3.521902 8.226752e-28 1.708696e-24        1
## GZMH           11.398084       3.510719 1.184154e-19 2.452383e-16        1
## CCL5           10.773228       3.429379 2.251589e-72 4.687808e-69        1
## RP11.222K16.2   7.877796       2.977792 1.436435e-05 2.920273e-02        1
##                        gene  cluster_1  cluster_2 cluster_3   cluster_4
## GZMK                   GZMK  2.4363833 0.05291050   0.00000 0.075901225
## LAG3                   LAG3  0.2468928 0.00000000   0.00000 0.007326283
## CD8A                   CD8A  1.0163009 0.03154194   0.00000 0.029272097
## GZMH                   GZMH  2.2865498 0.02629519   0.00000 0.034177635
## CCL5                   CCL5 12.3863881 0.21033406  33.79822 0.250237643
## RP11.222K16.2 RP11.222K16.2  0.1087713 0.00000000   0.00000 0.000000000
##                cluster_5   cluster_6   cluster_7
## GZMK          0.40592901 0.127154863 0.027465519
## LAG3          0.01062049 0.026494853 0.000000000
## CD8A          0.11671291 0.141884155 0.017431855
## GZMH          2.63214509 0.016212854 0.027473883
## CCL5          8.84515165 0.602894477 0.122993436
## RP11.222K16.2 0.13921096 0.008496299 0.003296331

Visualizing gene expressions

MyGenes <- unique(top.markers(marker.genes, topde = 10, min.base.mean = 0.2))
MyGenes
##  [1] "GZMK"      "CD8A"      "GZMH"      "CCL5"      "KLRG1"    
##  [6] "CD8B"      "LYAR"      "CST7"      "GZMA"      "NKG7"     
## [11] "IGLL5"     "LINC00926" "CD79A"     "TCL1A"     "MS4A1"    
## [16] "CD79B"     "HLA.DQB1"  "HLA.DQA1"  "HLA.DQA2"  "HVCN1"    
## [21] "PPBP"      "PF4"       "GPX1"      "TAGLN2"    "CALM3"    
## [26] "OAZ1"      "ACTB"      "MYL6"      "ITM2B"     "S100A8"   
## [31] "S100A9"    "LGALS2"    "CD14"      "MS4A6A"    "LYZ"      
## [36] "FCN1"      "GRN"       "CDA"       "BLVRB"     "GNLY"     
## [41] "GZMB"      "SPON2"     "FGFBP2"    "PRF1"      "CCL4"     
## [46] "CD247"     "LEF1"      "AQP3"      "CCR7"      "PRKCQ.AS1"
## [51] "TRAT1"     "FLT3LG"    "PIK3IP1"   "IL7R"      "LDHB"     
## [56] "TCF7"      "MS4A7"     "FCGR3A"    "IFITM3"    "LST1"     
## [61] "HCK"       "RHOC"      "SERPINA1"  "IFI30"     "AIF1"     
## [66] "FCER1G"

# main data 
gene.plot(my.obj, gene = "MS4A1", 
    plot.type = "scatterplot",
    interactive = F,
    data.type = "main")

# imputed data 
gene.plot(my.obj, gene = "MS4A1", 
    plot.type = "scatterplot",
    interactive = F,
    data.type = "imputed")

# imputed data clusters
gene.plot(my.obj, gene = "MS4A1", 
    box.pval = "sig.signs",
    col.by = "clusters",
    plot.type = "boxplot",
    interactive = F,
    data.type = "imputed")

# imputed data conditions
gene.plot(my.obj, gene = "MS4A1", 
    box.pval = "sig.values",
    col.by = "conditions",
    plot.type = "boxplot",
    interactive = F,
    data.type = "imputed")

# imputed data clusters
gene.plot(my.obj, gene = "MS4A1", 
    col.by = "clusters",
    plot.type = "barplot",
    interactive = F,
    out.name = "bar_plot", 
    data.type = "imputed")

# imputed data conditions
gene.plot(my.obj, gene = "MS4A1", 
    col.by = "conditions",
    plot.type = "barplot",
    interactive = F,
    out.name = "bar_plot",
    data.type = "imputed")

# Heat map on main data (gene list as in MyGenes object made above)
heatmap.gg.plot(my.obj, gene = MyGenes, 
                interactive = F, 
                cluster.by = "clusters")

# Heat map on imputed data 
heatmap.gg.plot(my.obj, gene = MyGenes, 
                interactive = F, 
                cluster.by = "clusters",
                data.type = "imputed")

genelist = c("PPBP","LYZ","MS4A1","GNLY","LTB","NKG7","IFITM2","CD14","S100A9")

for(i in genelist){
    MyPlot <- gene.plot(my.obj, gene = i, 
        interactive = F,
        plot.data.type = "umap",
        cell.transparency = 1)
    i <- gsub("-",".",i)
    eval(call("<-", as.name(i), MyPlot))
}

## 
library(gridExtra)
grid.arrange(PPBP,LYZ,MS4A1,GNLY,LTB,NKG7,IFITM2,CD14,S100A9)

genelist = c("PPBP","LYZ","MS4A1","GNLY","LTB","NKG7","IFITM2","CD14","S100A9")

for(i in genelist){
    MyPlot <- gene.plot(my.obj, gene = i, 
        interactive = F,
        plot.data.type = "tsne",
        cell.transparency = 1)
    i <- gsub("-",".",i)
    eval(call("<-", as.name(i), MyPlot))
}

## 
library(gridExtra)
grid.arrange(PPBP,LYZ,MS4A1,GNLY,LTB,NKG7,IFITM2,CD14,S100A9)

QC on clusters

clust.stats.plot(my.obj, plot.type = "box.mito", interactive = F)
clust.stats.plot(my.obj, plot.type = "box.gene", interactive = F)

Cell type prediction using ImmGen

## get marker genes for cluster 2
Cluster = 2
MyGenes <- unique(top.markers(marker.genes, topde = 40, min.base.mean = 0.2, cluster = Cluster))
MyGenes
##  [1] "IGLL5"     "LINC00926" "CD79A"     "TCL1A"     "MS4A1"    
##  [6] "CD79B"     "HLA.DQB1"  "HLA.DQA1"  "HLA.DQA2"  "HVCN1"    
## [11] "CD74"      "HLA.DMB"   "HLA.DRA"   "HLA.DMA"   "HLA.DPB1" 
## [16] "HLA.DRB1"  "HLA.DRB5"  "HLA.DPA1"  "SNX2"      "CD37"     
## [21] "LY86"      "NCF1"      "FAIM3"     "SNHG7"     "ISG20"    
## [26] "FAM26F"    "CXCR4"     "SEC62"     "RCSD1"     "TMEM243"  
## [31] "POLD4"     "LAPTM5"    "DCK"       "EZR"       "PAPOLA"   
## [36] "POU2F2"    "RNASEH2B"
# ImmGen dot plot
imm.gen(immgen.data = "rna", gene = MyGenes, plot.type = "point.plot")
imm.gen(immgen.data = "uli.rna", gene = MyGenes, plot.type = "point.plot", top.cell.types = 50)

As seen in the dot plots the B cells are in the top, suggesting that cluster 2 is B cells.

# ImmGen dot plot
imm.gen(immgen.data = "rna", gene = MyGenes, plot.type = "heatmap")
imm.gen(immgen.data = "uli.rna", gene = MyGenes, plot.type = "heatmap")

As seen in the heatmaps the B cells are more expressed for these genes.

Differential Expression Analysis

# Between clusters 
diff.res <- diff.exp(my.obj, de.by = "clusters", cond.1 = c(1,4), cond.2 = c(2))
diff.res1 <- as.data.frame(diff.res)
diff.res1 <- subset(diff.res1, padj < 0.05)
head(diff.res1)
##               baseMean          1_4           2   foldChange
## ABI3        0.21809887 2.839549e-01 0.054036489   0.19029955
## AC006369.2  0.02330890 3.266529e-02 0.000000000   0.00000000
## AC079767.4  0.05360692 9.323288e-04 0.184831350 198.24696396
## AC092580.4  0.06657490 9.184304e-02 0.003626185   0.03948241
## AC093673.5  0.04228509 5.724388e-02 0.005019326   0.08768318
## ACTB       17.26037308 2.050377e+01 9.180343474   0.44773940
##            log2FoldChange         pval         padj
## ABI3            -2.393656 9.582075e-10 1.381160e-05
## AC006369.2           -Inf 2.449500e-06 3.457715e-02
## AC079767.4       7.631155 1.004322e-09 1.447328e-05
## AC092580.4      -4.662646 1.290914e-10 1.868082e-06
## AC093673.5      -3.511556 1.230512e-07 1.755818e-03
## ACTB            -1.159269 4.796900e-94 7.126275e-90
## Between conditions  
# diff.res <- diff.exp(my.obj, de.by = "conditions", cond.1 = c("WT"), cond.2 = c("KO"))

## Betwen conditions in the same cluster
# diff.res <- diff.exp(my.obj, de.by = "clustBase.condComp", cond.1 = c("WT"), cond.2 = c("KO"), base.cond = 1)

## Between clusters in the same condition
# diff.res <- diff.exp(my.obj, de.by = "condBase.clustComp", cond.1 = c(1), cond.2 = c(2), base.cond = "WT")

Volcano and MA plots

# Volcano Plot 
volcano.ma.plot(diff.res,
    sig.value = "pval",
    sig.line = 0.05,
    plot.type = "volcano",
    interactive = F)

# MA Plot
volcano.ma.plot(diff.res,
    sig.value = "pval",
    sig.line = 0.05,
    plot.type = "ma",
    interactive = F)

Data manipulation

# let's say you  want to merge cluster 3 and 2.
my.obj <- change.clust(my.obj, change.clust = 3, to.clust = 2)

# to reset to the original clusters run this.
my.obj <- change.clust(my.obj, clust.reset = T)

# you can also re-name the cluster numbers to cell types. Remember to reset after this so you can ran other analysis. 
my.obj <- change.clust(my.obj, change.clust = 7, to.clust = "B Cell")

# Let's say for what ever reason you want to remove acluster, to do so run this.
my.obj <- clust.rm(my.obj, clust.to.rm = 1)

Cell gating

# first make a plot to gate 
my.plot <- gene.plot(my.obj, gene = "GNLY", 
  plot.type = "scatterplot",
  clust.dim = 2,
  interactive = F)

# gate and download cell ids. 
cell.gating(my.obj, my.plot = my.plot)

# once the cell ids are download (cellGating.txt), you can assign a cluster number to them. 
my.obj <- gate.to.clust(my.obj, my.gate = "cellGating.txt", to.clust = 10)

Pseudotime analysis

MyGenes <-  unique(top.markers(marker.genes, topde = 10, min.base.mean = 0.2))
#
pseudotime.tree(my.obj,
   marker.genes = MyGenes,
   type = "unrooted",
   clust.method = "complete")
# 
pseudotime.tree(my.obj,
   marker.genes = MyGenes,
   type = "jitter",
   clust.method = "complete")

CITE-Seq

Read RNA data

RNA.data <- read.delim("CITE_RNA.tsv",header=TRUE)
rownames(RNA.data) <- RNA.data$gene
RNA.data <- RNA.data[,-1]
# if you had multiple data you can marge them like this
#RNA.data <- data.aggregation(samples = c("sample1","sample2"), condition.names = c("KO","WT"))

Read ADT data

adt.data <- read.delim("CITE_ADT.tsv",header=TRUE)
rownames(adt.data) <- adt.data$gene
adt.data <- adt.data[,-1]
# if you had multiple data you can marge them like this
#adt.data <- data.aggregation(samples = c("sample1","sample2"), condition.names = c("KO","WT"))

Make iCellR object

my.obj <- make.obj(RNA.data)
my.obj <- add.adt(my.obj, adt.data = adt.data)
# look at your object
my.obj
## [1] "An object of class iCellR version: 0.99.0"                            
## [2] "Raw/original data dimentions (rows,columns): 20501,8617"              
## [3] "Data conditions: no conditions/single sample"                         
## [4] "Columns names: CTGTTTACACCGCTAG,CTCTACGGTGTGGCTC,AGCAGCCAGGCTCATT ..."
## [5] "Row names: A1BG,A1BG-AS1,A1CF ..."
# look at ADTs
head(my.obj@adt.raw)[1:5]
##            CTGTTTACACCGCTAG CTCTACGGTGTGGCTC AGCAGCCAGGCTCATT
## ADT_CD3                  60               52               89
## ADT_CD4                  72               49              112
## ADT_CD8                  76               59               61
## ADT_CD45RA              575             3943              682
## ADT_CD56                 64               68               87
## ADT_CD16                161              107              117
##            GAATAAGAGATCCCAT GTGCATAGTCATGCAT
## ADT_CD3                  55               63
## ADT_CD4                  66               80
## ADT_CD8                  56               94
## ADT_CD45RA              378              644
## ADT_CD56                 58              104
## ADT_CD16                 82              168

Perform RNA data analysis as above

my.obj <- qc.stats(my.obj, 
  which.data = "raw.data", 
  mito.genes = "defult", 
  s.phase.genes = s.phase, 
  g2m.phase.genes = g2m.phase)

# 
stats.plot(my.obj,
    plot.type = "all.in.one",
    out.name = "UMI-plot",
    interactive = FALSE,
    cell.color = "slategray3", 
    cell.size = 1, 
    cell.transparency = 0.5,
    box.color = "red",
    box.line.col = "green")
# 
my.obj <- cell.filter(my.obj,
  min.mito = 0,
  max.mito = 0.80,
  min.genes = 200,
  max.genes = 4000,
  min.umis = 0,
  max.umis = Inf)
#
my.obj <- norm.data(my.obj, norm.method = "ranked.glsf", top.rank = 500)
# 
my.obj <- gene.stats(my.obj, which.data = "main.data")
#
make.gene.model(my.obj, 
    dispersion.limit = 1.5, 
    base.mean.rank = 500, 
    no.mito.model = T, 
    mark.mito = T, 
    interactive = F,
    out.name = "gene.model")

Normalize and merge ADTs with the main data to include them in the analysis

# my.obj <- norm.adt(my.obj) (fix this)
my.obj <- adt.rna.merge(my.obj)

my.obj <- run.pca(my.obj, clust.method = "gene.model", gene.list = readLines("my_model_genes.txt"), batch.norm = F)
#
my.obj <- run.clustering(my.obj, 
    clust.method = "ward.D", 
    dist.method = "euclidean",
    index.method = "kl",
    max.clust = 25,
    min.clust = 2,
    dims = 1:10)
#
my.obj <- run.pc.tsne(my.obj, dims = 1:10)
#
my.obj <- run.umap(my.obj, dims = 1:10, method = "naive")
#
my.obj <- clust.avg.exp(my.obj)
#
save(my.obj, file = "my.CITE.obj.Robj")

Visualize ADTs and genes

genelist = c("ADT_CD3","CD3E","ADT_CD11c","ITGAX","ADT_CD16","FCGR3A")

for(i in genelist){
    MyPlot <- gene.plot(my.obj, gene = i, 
        interactive = F,
        plot.data.type = "tsne",
        cell.transparency = 1)
    i <- gsub("-",".",i)
    eval(call("<-", as.name(i), MyPlot))
}

## 
library(gridExtra)
grid.arrange(ADT_CD3,CD3E,ADT_CD11c,ITGAX,ADT_CD16,FCGR3A, ncol = 2)

The rest of the analysis is as scRNA-Seq

scVDJ-Seq

Large Scale Bulk RNA-Seq

Read data

This data is already normalized

tcga.data <- read.delim("TCGA.BRCA.tsv",header=TRUE)
rownames(tcga.data) <- tcga.data$gene
tcga.data <- tcga.data[,-1]

Make iCellR object

my.obj <- make.obj(tcga.data)
my.obj@main.data <- my.obj@raw.data
#
my.obj <- gene.stats(my.obj, which.data = "main.data")
#
make.gene.model(my.obj, 
    dispersion.limit = 1000, 
    base.mean.rank = 500, 
    no.mito.model = T, 
    mark.mito = T, 
    interactive = F,
    out.name = "gene.model")
#
my.obj <- run.pca(my.obj, 
                  clust.method = "gene.model", 
                  gene.list = readLines("my_model_genes.txt"), 
                  batch.norm = F)
#
my.obj <- run.clustering(my.obj, 
    clust.method = "kmeans", 
    dist.method = "euclidean",
    index.method = "silhouette",
    max.clust = 25,
    min.clust = 2,
    dims = 1:10)
#
my.obj <- run.pc.tsne(my.obj, dims = 1:10)
#
my.obj <- run.umap(my.obj, dims = 1:10, method = "naive")
#
save(my.obj, file = "my.TCGA.obj.Robj")

visualize conditions

cluster.plot(my.obj,
                 plot.type = "tsne",
                 col.by = "conditions",
                 cell.size = 2,
                 cell.transparency = 1,
                 interactive = F)
#
cluster.plot(my.obj,
                 plot.type = "umap",
                 col.by = "conditions",
                 cell.size = 2,
                 cell.transparency = 1,
                 interactive = F)